library(tidyverse)

zinc20_all_instock <-
'~/append-ssd/ringtail-test/data/zinc20-all-instock-2d-smi/' |>
list.files('.smi$', full.names = T) |>
  read_delim(delim = ' ')

v4_instock <- read_delim('virtual_screen_v3/vendor4.in-stock.zincid.csv')

v4_ondemand <- read_csv('virtual_screen_v3/vendor4.on-demand.zincid.csv')

v4_instock |>
  mutate(zinc_id = str_extract(zinc_id, '\\d+') |> as.double()) |>
  inner_join(zinc20_all_instock) |>
  write_csv('virtual_screen_v3/vendor4.in-stock.zinc.smi.csv')

v4_instock_smi <-
  read_csv('virtual_screen_v3/vendor4.in-stock.zinc.smi.csv')

v4_instock_smi |>
  mutate(src = NULL,
         zinc_id = str_pad(zinc_id, width = 12, side = 'left', pad = '0'),
         zinc_id = str_c('ZINC', zinc_id)) |>
  relocate(smiles) |>
  write_delim('~/append-ssd/ringtail-test/data/zinc20-vendor4-instock-2d.smi')

v4_instock_tranch <-
  read_delim('~/append-ssd/ringtail-test/data/zinc20-vendor4-instock-2d.smi')

v4_instock_nouci <- read_delim('virtual_screen_v3/vendor4.is.nouci.smi',
                               col_names = c('smiles','zinc_id'))

v4_instock_nouci |>
  filter(!(zinc_id %in% c(v4_instock_tranch$zinc_id))) |>
  write_delim('~/append-ssd/ringtail-test/data/zinc20-vendor4-instock-2d-b2.smi')

v4_instock |>
  filter(!(zinc_id %in% c(v4_instock_tranch$zinc_id, v4_instock_nouci$zinc_id))) |>
  write_csv('v4_instock_notranch_uci.csv')

v4_instock_tranch

read_delim('v4_instock_notranch.aio.smi', col_names = c('smiles', 'zinc_id')) |>
  bind_rows(v4_instock_tranch, v4_instock_nouci) |>
  distinct(zinc_id, .keep_all = T) |>
  write_tsv('vendor4.in-stock.smi')

v4_ondemand |>
  mutate(zinc_id = str_extract(zinc_id, '\\d+') |> as.double()) |>
  inner_join(zinc20_all_instock)

zinc20_instock_url <-
  '~/append-ssd/ringtail-test/data/zinc20-all-instock-2d-smi/' |>
  list.files('.uri$', full.names = T) |>
  read_csv(col_names = 'url')

zinc20_instock_url

zinc20_waitok_url <-
  '~/append-ssd/ringtail-test/data/zinc20-all-waitok-2d-smi/' |>
  list.files('.uri$', full.names = T) |>
  read_csv(col_names = 'url')

# continue vina -----------
v4_ligands <-
list.files('~/append-ssd/ringtail-test/data/zinc20-vendor4-instock-pdbqt',
           full.names = T)

done_vina <-
tibble(path = str_subset(v4_ligands, 'out'),
       prefix = basename(path) |> str_remove('_out.pdbqt'))

done_vina

input_vina <-
  tibble(path = str_subset(v4_ligands, 'out', negate = T),
         prefix = basename(path) |> str_remove('.pdbqt'))

input_vina |>
  filter(!(prefix %in% done_vina$prefix)) |>
  mutate(path = str_extract(path, 'zinc20.+'), .keep = 'none') |>
  write_csv('~/append-ssd/ringtail-test/data/v4_vina_todo.txt', col_names = F)
